#Project Goal and Project Data
The main purpose of our project is to develop an estimation about when the Covid-19 pandemic, which started in 2019 and was named “global epidemic” in March 2020, preventing all humanity from their normal lives and causing some material and moral damage, will end in countries with a population of over 3 million worldwide.
In our initial data, all data related to predicting the end of the Covid-19 pandemic are available, except for the number of patients recovering, such as the total number of cases, the number of daily cases, the total number of deaths, and the number of daily deaths. Our project data can be accessed at “https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv”.
##Actions Taken
library(tidyverse)
library(readxl)
library(readr)
library(rmarkdown)
library(magrittr)
library(reshape2)
library(corrplot)
library(plotly)
library(patchwork)
library(hrbrthemes)
##Importing data from web to keep it updated.
totalcases <- read.csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv")
forrecovered <- read.csv("https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv")
##Getting ready the data for cleaning process.
totalcases %<>%
select( "location","date", "total_cases", "new_cases", "total_deaths", "new_deaths", "total_vaccinations", "people_vaccinated", "new_vaccinations", "population") %>%
rename(Country = "location", Date="date") %>%
rename(TotalCases="total_cases",NewCases= "new_cases",TotalDeath = "total_deaths", NewDeath = "new_deaths", TotalVaccinations = "total_vaccinations", PeopleVaccinated = "people_vaccinated", NewVaccination = "new_vaccinations", Population = "population")
Our assumption about our model is that; if a country’s population is less than 3 millions we can’t reach a suitable result on that country.
And we checked that Countries about the data it announced is more than 365 to eliminate inappropriate data.
totalcases <- filter(totalcases, Population>3000000)
##countries <- unique(totalcases\(Country) x <- c() sum(totalcases\)Country==countries) for(i in countries){
if(sum(totalcases$Country==i)>=365){ x <- c(x,i) }
}
Bu döngüyle sadece türkmenistanın bu sınırlar dışında kaldığı anlaşılmıştır
totalcases <- totalcases[-(totalcases$Country=='Turkmenistan'),]
#Let’s do the necessary actions to give the number of people recovered in Turkey.
forrecovered <- forrecovered[forrecovered$Country=='Turkey',]
forrecovered = forrecovered[-(1:49),]
#Up to this point, we have carried out the necessary data cleaning processes. After that, we will try to obtain the necessary parameters for our model.
```r
# In our estimation, we will focus on Turkey, so firstly let's pull Turkey from our totalcases dataset, which contains all the data.
totaldatastur <- totalcases[totalcases$Country=='Turkey',]
view(totaldatastur)
totalrecovered <- c(forrecovered$Recovered)
as.numeric(totalrecovered)
## [1] 0 0 0 0 0 0 0 0 0
## [10] 0 0 0 0 0 26 26 42 70
## [19] 105 162 243 333 415 484 786 1042 1326
## [28] 1582 1846 2142 2423 2965 3446 3957 4799 5674
## [37] 7089 8631 10453 11976 13430 14918 16477 18491 21737
## [46] 25582 29140 33791 38809 44040 48886 53808 58259 63151
## [55] 68166 73285 78202 82984 86396 89480 92691 95780 98889
## [64] 101715 104030 106133 108137 109962 111577 112895 113987 114990
## [73] 116111 117602 118694 120015 121507 122793 124369 125963 126984
## [82] 127973 128947 129921 130852 131778 133400 135322 137969 141380
## [91] 144598 146839 147860 149102 150087 151417 152364 153379 154640
## [100] 156022 157516 158828 160240 161533 162848 164234 165706 167198
## [109] 169182 170595 171809 173111 175422 176965 178278 179492 180680
## [118] 182995 185292 187511 190390 191883 193217 194515 195671 196720
## [127] 197733 198820 199834 201013 202010 203002 204011 205214 206365
## [136] 207374 208477 209487 210469 211561 212557 213539 214535 215516
## [145] 216494 217497 218491 219506 220546 221574 222656 223759 224970
## [154] 226155 227089 228057 228980 229972 230969 231971 232913 233915
## [163] 234797 235569 236370 237165 237908 238795 239797 240792 241809
## [172] 242812 243839 244926 245929 246876 248087 249108 250092 251105
## [181] 252152 253245 254188 255407 256524 257731 258833 260058 261260
## [190] 262602 263745 264805 266117 267233 268435 269696 270723 271964
## [199] 273282 274514 275630 277052 278504 279749 281151 282657 283868
## [208] 285050 286370 287599 288954 290352 291754 293145 294357 295658
## [217] 296972 298368 299679 301098 302499 304003 305427 306939 308446
## [226] 310027 311520 313093 314390 316008 317519 319181 320762 322465
## [235] 323971 325486 327007 328824 330665 332379 334293 336221 338239
## [244] 340286 342501 344613 346794 348804 351102 353663 356375 359063
## [253] 361655 364573 367592 370825 374637 377891 381569 385480 388771
## [262] 392616 396227 400242 404727 409320 414141 418331 423142 427242
## [271] 431253 436270 441515 447361 452593 458109 1581565 1603780 1631944
## [280] 1661191 1691113 1721607 1753552 1779068 1800286 1834705 1866815 1901307
## [289] 1935292 1970803 1994034 2015230 2037433 2058437 2078629 2100650 2114760
## [298] 2126432 2136534 2146430 2155338 2164040 2172251 2182145 2190047 2198150
## [307] 2208451 2218464 2227927 2236938 2246047 2254052 2262864 2270769 2277987
## [316] 2283919 2290032 2296050 2301861 2307721 2314403 2322511 2331314 2340216
## [325] 2348309 2355409 2362415 2370431 2379070 2387384 2396199 2404416 2412505
## [334] 2420706 2429273 2437382 2445285 2453096 2461204 2468419 2475329 2482435
## [343] 2489624 2496833 2504050 2511548 2518758 2523760 2529450 2534996 2540293
## [352] 2546503 2556785 2565723 2572234 2578181 2586073 2593264 2601137 2608848
## [361] 2616139 2623924 2632030 2640669 2649862 2659093 2670273 2685560 2701076
## [370] 2716969 2734862 2752023 2770638 2788757 2807572 2825187 2844681 2863882
## [379] 2881643 2900829 2921037 2939929 2957093 2975108 2995033 3014226 3035043
## [388] 3059462 3082676 3105350 3130977 3159475 3194978 3232892 3268678 3301217
## [397] 3331411 3372629 3424733 3480146 3535040 3591550 3643734 3687590 3736537
## [406] 3792129 3844342 3909935 3970111 4022408 4073644 4121671 4167263 4212461
## [415] 4255714 4323897 4405199 4480381 4515819 4554037 4589501 4626799 4662328
## [424] 4691224 4716918 4743871 4766124 4801291 4856763 4894024 4932838 4947256
## [433] 4961120 4971245 4980516 4989787 4998639 5013111 5024313 5034316 5045508
## [442] 5057713 5070815 5083099 5094279 5105042 5114624 5124081 5131453 5139993
## [451] 5147610 5154771 5160774 5167350 5173186 5179833 5186728 5192945 5198057
## [460] 5202251 5206836 5211022 5215654 5219797 5224224 5228419 5232638 5237731
## [469] 5242945 5248862 5254708 5261892 5269294
totaldatastur$Recovered <- as.numeric(totalrecovered)
options(digits=9)
totaldatastur$Recovered <- totalrecovered
## I (infectious) in SIR MODEL
totaldatastur$ActiveCases.I <- totaldatastur$TotalCases - totaldatastur$Recovered - totaldatastur$TotalDeath
## R (Removed) in SIR MODEL
totaldatastur$Removed <- totaldatastur$PeopleVaccinated + totaldatastur$Recovered + totaldatastur$TotalDeath
## S (Suspectible) in SIR MODEL
totaldatastur$Suspectible <- totaldatastur$Population - totaldatastur$Removed - totaldatastur$ActiveCases.I
## Immuned is added to see linear regression Results
totaldatastur$Immuned <- totaldatastur$PeopleVaccinated + totaldatastur$Recovered
totaldatastur[is.na(totaldatastur)] = 0
Correleation Heatmap
##Before starting the regression analysis, we need to do a correlation analysis.
forcor = totaldatastur[(400:470),c(3:9,11:15)]
corr = cor(forcor, method="spearman")
corrplot(corr)
Multi Regression Model
fit <- lm(NewCases ~ ActiveCases.I - Immuned, data=totaldatastur[(400:470),])
confint(fit, level=0.95)
## 2.5 % 97.5 %
## (Intercept) -5422.667857792 -1179.414294240
## ActiveCases.I 0.092116495 0.106416288
fitted(fit)
## 62864 62865 62866 62867 62868 62869
## 47392.02194 48008.36696 48632.05770 49638.02331 50792.29291 51374.09323
## 62870 62871 62872 62873 62874 62875
## 51879.35916 52811.66911 51704.25325 50604.28236 49409.11501 48115.67393
## 62876 62877 62878 62879 62880 62881
## 47016.99351 46755.22803 46249.46577 45662.00726 42020.32043 36773.29750
## 62882 62883 62884 62885 62886 62887
## 31855.44194 30758.34978 29809.66087 28882.11571 27371.87683 25813.39449
## 62888 62889 62890 62891 62892 62893
## 24709.05588 23638.36858 22285.26840 21487.76221 19267.17303 14881.98092
## 62894 62895 62896 62897 62898 62899
## 12290.23470 9552.66616 9142.79523 8754.36584 8914.08546 9117.48230
## 62900 62901 62902 62903 62904 62905
## 9108.25053 9154.11160 8557.91765 8204.52930 7940.67923 7742.94058
## 62906 62907 62908 62909 62910 62911
## 7382.30578 6899.97038 6435.90000 6072.48574 5678.99376 5360.24938
## 62912 62913 62914 62915 62916 62917
## 5114.66433 5084.58661 4880.89198 4727.82320 4615.94998 4545.17304
## 62918 62919 62920 62921 62922 62923
## 4443.92132 4512.11733 4484.32274 4426.45044 4423.96878 4512.21660
## 62924 62925 62926 62927 62928 62929
## 4588.15539 4684.14599 4851.40986 5002.09624 5170.74984 5278.85094
## 62930 62931 62932 62933 62934
## 5401.34567 5481.65218 5496.54214 5583.10243 5565.92934
plot(fit)
summary(fit)
##
## Call:
## lm(formula = NewCases ~ ActiveCases.I - Immuned, data = totaldatastur[(400:470),
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -10129.320 -2877.048 840.827 1779.070 15404.978
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.30104e+03 1.06350e+03 -3.10394 0.0027685 **
## ActiveCases.I 9.92664e-02 3.58401e-03 27.69705 < 2.22e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5455.43 on 69 degrees of freedom
## Multiple R-squared: 0.917477, Adjusted R-squared: 0.916281
## F-statistic: 767.127 on 1 and 69 DF, p-value: < 2.22e-16
p <- plot_ly(x = totaldatastur$Date , y = totaldatastur$Removed, type="scatter", mode="line", fill = "tozeroy", name="Removed")
p <- add_trace(p, x = totaldatastur$Date, y = totaldatastur$Suspectible, type="scatter", mode="line", fill = "red",name="Suspectible")
p
p1 <- ggplot(totaldatastur, aes(x=Date, y=Suspectible)) +
geom_line(color="#green", size=2) +
ggtitle("Date-Suspectible") +
theme_ipsum()
p2 <- ggplot(totaldatastur, aes(x=Date, y=Removed)) +
geom_line(color="grey",size=2) +
ggtitle("Date-Removed") +
theme_ipsum()
p1 + p2
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
##{r} library(plotrix) slices <- c(totaldatastur$Suspectible,totaldatastur$ActiveCases.I,totaldatastur$Removed) lbls <- c("Suspectible", "Infected", "Removed") pie3D(slices,labels=lbls,explode=0.1, main="Pie Chart of SIR ")